home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.5) """httpclient.py Implements an HTTP client. The main way that this module is used is the grabURL function that's an asynchronous version of our old grabURL. A lot of the code here comes from inspection of the httplib standard module. Some of it was taken more-or-less directly from there. I (Ben Dean-Kawamura) believe our clients follow the HTTP 1.1 spec completely, I used RFC2616 as a reference (http://www.w3.org/Protocols/rfc2616/rfc2616.html). """ import errno import logging import re import socket import traceback from urlparse import urljoin from gtcache import gettext as _ from base64 import b64encode from clock import clock import httpauth import config import prefs from download_utils import URIPattern, cleanFilename, parseURL, defaultPort, getFileURLPath, filenameFromURL from xhtmltools import URLEncodeDict, multipartEncode import eventloop import util import sys import time import urllib PIPELINING_ENABLED = True SOCKET_READ_TIMEOUT = 60 SOCKET_INITIAL_READ_TIMEOUT = 30 SOCKET_CONNECT_TIMEOUT = 15 class NetworkError(Exception): '''Base class for all errors that will be passed to errbacks from getURL and friends. NetworkErrors can be display in 2 ways: getFriendlyDescription() -- short, newbie friendly description getLongDescription() -- detailed description ''' def __init__(self, shortDescription, longDescription = None): if longDescription is None: longDescription = shortDescription self.friendlyDescription = _('Error: %s') % shortDescription self.longDescription = longDescription def getFriendlyDescription(self): return self.friendlyDescription def getLongDescription(self): return self.longDescription def __str__(self): return '%s: %s -- %s' % (self.__class__, self.getFriendlyDescription(), self.getLongDescription()) class ConnectionError(NetworkError): def __init__(self, errorMessage): self.friendlyDescription = _("Can't connect") self.longDescription = _('Connection Error: %s') % util.stringify(errorMessage, 'replace') class SSLConnectionError(ConnectionError): def __init__(self): self.friendlyDescription = _("Can't connect") self.longDescription = _('SSL connection error') class HTTPError(NetworkError): def __init__(self, longDescription): NetworkError.__init__(self, _('HTTP error'), longDescription) class BadStatusLine(HTTPError): def __init__(self, line): HTTPError.__init__(self, _('Bad Status Line: %s') % line) class BadHeaderLine(HTTPError): def __init__(self, line): HTTPError.__init__(self, _('Bad Header Line: %s') % line) class BadChunkSize(HTTPError): def __init__(self, line): HTTPError.__init__(self, _('Bad Chunk size: %s') % line) class CRLFExpected(HTTPError): def __init__(self, crlf): HTTPError.__init__(self, _('Expected CRLF got: %r') % crlf) class ServerClosedConnection(HTTPError): def __init__(self, host): HTTPError.__init__(self, _('%s closed connection') % host) class UnexpectedStatusCode(HTTPError): def __init__(self, code): if code == 404: self.friendlyDescription = _('File not found') self.longDescription = _('Got 404 status code') else: HTTPError.__init__(self, _('Bad Status Code: %s') % code) class AuthorizationFailed(NetworkError): def __init__(self): NetworkError.__init__(self, _('Authorization failed')) class PipelinedRequestNeverStarted(NetworkError): def __init__(self): NetworkError.__init__(self, _('Internal Error'), _('Pipeline request never started')) class ConnectionTimeout(NetworkError): def __init__(self, host): NetworkError.__init__(self, _('Timeout'), _('Connection to %s timed out') % host) class MalformedURL(NetworkError): def __init__(self, url): NetworkError.__init__(self, _('Invalid URL'), _('"%s" is not a valid URL') % url) class FileURLNotFoundError(NetworkError): """A file: URL doesn't exist""" def __init__(self, path): NetworkError.__init__(self, _('File not found'), _('The file: "%s" doesn\'t exist') % path) class FileURLReadError(NetworkError): def __init__(self, path): NetworkError.__init__(self, _('Read error'), _('Error while reading from "%s"') % path) def trapCall(object, function, *args, **kwargs): """Convenience function do a util.trapCall, where when = 'While talking to the network' """ return util.timeTrapCall('Calling %s on %s' % (function, object), function, *args, **kwargs) DATEINFUTURE = time.mktime((2030, 7, 12, 12, 0, 0, 4, 193, -1)) def get_cookie_expiration_date(val): """Tries a bunch of possible cookie expiration date formats until it finds the magic one (or doesn't and returns 0). """ fmts = ('%a, %d %b %Y %H:%M:%S %Z', '%a, %d %b %y %H:%M:%S %Z', '%a, %d-%b-%Y %H:%M:%S %Z', '%a, %d-%b-%y %H:%M:%S %Z') for fmt in fmts: try: return time.mktime(time.strptime(val, fmt)) continue except OverflowError: oe = None return DATEINFUTURE continue except ValueError: ve = None continue print "DTV: Warning: Can't process cookie expiration: '%s'" % val return 0 class NetworkBuffer(object): '''Responsible for storing incomming network data and doing some basic parsing of it. I think this is about as fast as we can do things in pure python, someday we may want to make it C... ''' def __init__(self): self.chunks = [] self.length = 0 def addData(self, data): self.chunks.append(data) self.length += len(data) def _mergeChunks(self): self.chunks = [ ''.join(self.chunks)] def read(self, size = None): '''Read at most size bytes from the data that has been added to the buffer. ''' self._mergeChunks() if size is not None: rv = self.chunks[0][:size] self.chunks[0] = self.chunks[0][len(rv):] else: rv = self.chunks[0] self.chunks = [] self.length -= len(rv) return rv def readline(self): '''Like a file readline, with several difference: * If there isn\'t a full line ready to be read we return None. * Doesn\'t include the trailing line separator. * Both "\r " and " " act as a line ender ''' self._mergeChunks() split = self.chunks[0].split('\n', 1) if len(split) == 2: self.chunks[0] = split[1] self.length = len(self.chunks[0]) if split[0].endswith('\r'): return split[0][:-1] else: return split[0] else: return None def unread(self, data): '''Put back read data. This make is like the data was never read at all. ''' self.chunks.insert(0, data) self.length += len(data) def getValue(self): self._mergeChunks() return self.chunks[0] class _Packet(object): '''A packet of data for the AsyncSocket class ''' def __init__(self, data, callback = None): self.data = data self.callback = callback class AsyncSocket(object): '''Socket class that uses our new fangled asynchronous eventloop module. ''' MEMORY_ERROR_LIMIT = 5 def __init__(self, closeCallback = None): '''Create an AsyncSocket. If closeCallback is given, it will be called if we detect that the socket has been closed durring a read/write operation. The arguments will be the AsyncSocket object and either socket.SHUT_RD or socket.SHUT_WR. ''' self.toSend = [] self.readSize = 4096 self.socket = None self.readCallback = None self.closeCallback = closeCallback self.readTimeout = None self.timedOut = False self.connectionErrback = None self.disableReadTimeout = False self.readSomeData = False self.name = '' self.lastClock = None self.memoryErrors = 0 def __str__(self): if self.name: return '%s: %s' % (type(self).__name__, self.name) else: return 'Unknown %s' % (type(self).__name__,) def startReadTimeout(self): if self.disableReadTimeout: return None self.lastClock = clock() if self.readTimeout is not None: return None self.readTimeout = eventloop.addTimeout(SOCKET_INITIAL_READ_TIMEOUT, self.onReadTimeout, 'AsyncSocket.onReadTimeout') def stopReadTimeout(self): if self.readTimeout is not None: self.readTimeout.cancel() self.readTimeout = None def openConnection(self, host, port, callback, errback, disableReadTimeout = None): '''Open a connection. On success, callback will be called with this object. ''' if disableReadTimeout is not None: self.disableReadTimeout = disableReadTimeout self.name = 'Outgoing %s:%s' % (host, port) try: self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) except socket.error: e = None trapCall(self, errback, ConnectionError(e[1])) return None self.socket.setblocking(0) self.connectionErrback = errback def handleGetHostByNameException(e): trapCall(self, errback, ConnectionError(e[1])) def onAddressLookup(address): if self.socket is None: return None try: rv = self.socket.connect_ex((address, port)) except socket.gaierror: trapCall(self, errback, ConnectionError('gaierror')) return None if rv in (0, errno.EINPROGRESS, errno.EWOULDBLOCK): eventloop.addWriteCallback(self.socket, onWriteReady) self.socketConnectTimeout = eventloop.addTimeout(SOCKET_CONNECT_TIMEOUT, onWriteTimeout, 'socket connect timeout') else: msg = errno.errorcode[rv] trapCall(self, errback, ConnectionError(msg)) def onWriteReady(): eventloop.removeWriteCallback(self.socket) self.socketConnectTimeout.cancel() rv = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) if rv == 0: trapCall(self, callback, self) else: msg = errno.errorcode.get(rv, _('Unknown Error code')) trapCall(self, errback, ConnectionError(msg)) self.connectionErrback = None def onWriteTimeout(): eventloop.removeWriteCallback(self.socket) trapCall(self, errback, ConnectionTimeout(host)) self.connectionErrback = None eventloop.callInThread(onAddressLookup, handleGetHostByNameException, socket.gethostbyname, 'getHostByName - %s' % host, host) def acceptConnection(self, host, port, callback, errback): def finishAccept(): eventloop.removeReadCallback(self.socket) (self.socket, addr) = self.socket.accept() trapCall(self, callback, self) self.connectionErrback = None self.name = 'Incoming %s:%s' % (host, port) self.connectionErrback = errback self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.socket.bind((host, port)) (self.addr, self.port) = self.socket.getsockname() self.socket.listen(63) eventloop.addReadCallback(self.socket, finishAccept) def closeConnection(self): if self.isOpen(): eventloop.stopHandlingSocket(self.socket) self.stopReadTimeout() self.socket.close() self.socket = None if self.connectionErrback is not None: error = NetworkError(_('Connection closed')) trapCall(self, self.connectionErrback, error) self.connectionErrback = None def isOpen(self): return self.socket is not None def sendData(self, data, callback = None): '''Send data out to the socket when it becomes ready. NOTE: currently we have no way of detecting when the data gets sent out, or if errors happen. ''' if not self.isOpen(): raise ValueError('Socket not connected') self.toSend.append(_Packet(data, callback)) eventloop.addWriteCallback(self.socket, self.onWriteReady) def startReading(self, readCallback): '''Start reading from the socket. When data becomes available it will be passed to readCallback. If there is already a read callback, it will be replaced. ''' if not self.isOpen(): raise ValueError('Socket not connected') self.readCallback = readCallback eventloop.addReadCallback(self.socket, self.onReadReady) self.startReadTimeout() def stopReading(self): '''Stop reading from the socket.''' if not self.isOpen(): raise ValueError('Socket not connected') self.readCallback = None eventloop.removeReadCallback(self.socket) self.stopReadTimeout() def onReadTimeout(self): if self.readSomeData: timeout = SOCKET_READ_TIMEOUT else: timeout = SOCKET_INITIAL_READ_TIMEOUT if clock() < self.lastClock + timeout: self.readTimeout = eventloop.addTimeout(self.lastClock + timeout - clock(), self.onReadTimeout, 'AsyncSocket.onReadTimeout') else: self.readTimeout = None self.timedOut = True self.handleEarlyClose('read') def handleSocketError(self, code, msg, operation): if code in (errno.EWOULDBLOCK, errno.EINTR): return None if operation == 'write': expectedErrors = (errno.EPIPE, errno.ECONNRESET) else: expectedErrors = (errno.ECONNREFUSED, errno.ECONNRESET) if code not in expectedErrors: print 'WARNING, got unexpected error during %s' % operation print '%s: %s' % (errno.errorcode.get(code), msg) self.handleEarlyClose(operation) def onWriteReady(self): try: if len(self.toSend) > 0: sent = self.socket.send(self.toSend[0].data) else: sent = 0 except socket.error: (code, msg) = None self.handleSocketError(code, msg, 'write') self.handleSentData(sent) def handleSentData(self, sent): if len(self.toSend) > 0: self.toSend[0].data = self.toSend[0].data[sent:] if len(self.toSend[0].data) == 0: if self.toSend[0].callback: self.toSend[0].callback() self.toSend = self.toSend[1:] if len(self.toSend) == 0: eventloop.removeWriteCallback(self.socket) def onReadReady(self): try: data = self.socket.recv(self.readSize) except socket.error: (code, msg) = None self.handleSocketError(code, msg, 'read') except MemoryError: self.memoryErrors += 1 if self.memoryErrors > self.MEMORY_ERROR_LIMIT: print 'ERROR: Too many MemoryErrors on %s' % self self.handleEarlyClose('read') else: print 'WARNING: Memory error while reading from %s' % self except: self.memoryErrors > self.MEMORY_ERROR_LIMIT self.memoryErrors = 0 self.handleReadData(data) def handleReadData(self, data): self.startReadTimeout() if data == '': if self.closeCallback: trapCall(self, self.closeCallback, self, socket.SHUT_RD) else: self.readSomeData = True trapCall(self, self.readCallback, data) def handleEarlyClose(self, operation): self.closeConnection() if self.closeCallback: if operation == 'read': type = socket.SHUT_RD else: type = socket.SHUT_WR trapCall(self, self.closeCallback, self, type) class AsyncSSLStream(AsyncSocket): def __init__(self, closeCallback = None): super(AsyncSSLStream, self).__init__(closeCallback) self.interruptedOperation = None def openConnection(self, host, port, callback, errback, disableReadTimeout = None): def onSocketOpen(self): self.socket.setblocking(1) eventloop.callInThread(onSSLOpen, handleSSLError, socket.ssl, 'AsyncSSL onSocketOpen()', self.socket) def onSSLOpen(ssl): if self.socket is None: return None self.socket.setblocking(0) self.ssl = ssl callback(self) def handleSSLError(error): errback(SSLConnectionError()) super(AsyncSSLStream, self).openConnection(host, port, onSocketOpen, errback, disableReadTimeout) def resumeNormalCallbacks(self): if self.readCallback is not None: eventloop.addReadCallback(self.socket, self.onReadReady) if len(self.toSend) != 0: eventloop.addWriteCallback(self.socket, self.onWriteReady) def handleSocketError(self, code, msg, operation): if code in (socket.SSL_ERROR_WANT_READ, socket.SSL_ERROR_WANT_WRITE): if self.interruptedOperation is None: self.interruptedOperation = operation elif self.interruptedOperation != operation: util.failed('When talking to the network', details = 'socket error for the wrong SSL operation') self.closeConnection() return None eventloop.stopHandlingSocket(self.socket) if code == socket.SSL_ERROR_WANT_READ: eventloop.addReadCallback(self.socket, self.onReadReady) else: eventloop.addWriteCallback(self.socket, self.onWriteReady) elif code in (socket.SSL_ERROR_ZERO_RETURN, socket.SSL_ERROR_SSL, socket.SSL_ERROR_SYSCALL, socket.SSL_ERROR_EOF): self.handleEarlyClose(operation) else: super(AsyncSSLStream, self).handleSocketError(code, msg, operation) def onWriteReady(self): if self.interruptedOperation == 'read': return self.onReadReady() try: if len(self.toSend) > 0: sent = self.ssl.write(self.toSend[0].data) else: sent = 0 except socket.error: (code, msg) = None self.handleSocketError(code, msg, 'write') if self.interruptedOperation == 'write': self.resumeNormalCallbacks() self.interruptedOperation = None self.handleSentData(sent) def onReadReady(self): if self.interruptedOperation == 'write': return self.onWriteReady() try: data = self.ssl.read(self.readSize) except socket.error: (code, msg) = None self.handleSocketError(code, msg, 'read') if self.interruptedOperation == 'read': self.resumeNormalCallbacks() self.interruptedOperation = None self.handleReadData(data) class ProxiedAsyncSSLStream(AsyncSSLStream): def openConnection(self, host, port, callback, errback, disableReadTimeout): def onSocketOpen(self): self.socket.setblocking(1) None(eventloop.callInThread, (onSSLOpen, handleSSLError), (lambda : openProxyConnection(self)), 'ProxiedAsyncSSL openProxyConnection()') def openProxyConnection(self): headers = { 'User-Agent': '%s/%s (%s)' % (config.get(prefs.SHORT_APP_NAME), config.get(prefs.APP_VERSION), config.get(prefs.PROJECT_URL)), 'Host': host } if config.get(prefs.HTTP_PROXY_AUTHORIZATION_ACTIVE): username = config.get(prefs.HTTP_PROXY_AUTHORIZATION_USERNAME) password = config.get(prefs.HTTP_PROXY_AUTHORIZATION_PASSWORD) authString = username + ':' + password authString = b64encode(authString) headers['ProxyAuthorization'] = 'Basic ' + authString connectString = 'CONNECT %s:%d HTTP/1.1\r\n' % (host, port) for header, value in headers.items(): connectString += '%s: %s\r\n' % (header, value) connectString += '\r\n' try: self.socket.send(connectString) data = '' while data.find('\r\n\r\n') == -1: data += self.socket.recv(1) data = data.split('\r\n') if -1 == data[0].find(' 200 '): (None, eventloop.addIdle)((lambda : handleSSLError(NetworkError(data[0]))), 'Network Error') else: return socket.ssl(self.socket) except socket.error: (code, msg) = None handleSSLError(msg) def onSSLOpen(ssl): if self.socket is None or ssl is None: return None self.socket.setblocking(0) self.ssl = ssl callback(self) def handleSSLError(error): errback(SSLConnectionError()) proxy_host = config.get(prefs.HTTP_PROXY_HOST) proxy_port = config.get(prefs.HTTP_PROXY_PORT) AsyncSocket.openConnection(self, proxy_host, proxy_port, onSocketOpen, errback, disableReadTimeout) class ConnectionHandler(object): """Base class to handle asynchronous network streams. It implements a simple state machine to deal with incomming data. Sending data: Use the sendData() method. Reading Data: Add entries to the state dictionary, which maps strings to methods. The state methods will be called when there is data available, which can be read from the buffer variable. The states dictionary can contain a None value, to signal that the handler isn't interested in reading at that point. Use changeState() to switch states. Subclasses should override tho the handleClose() method to handle the socket closing. """ streamFactory = AsyncSocket def __init__(self): self.buffer = NetworkBuffer() self.states = { 'initializing': None, 'closed': None } self.stream = self.streamFactory(closeCallback = self.closeCallback) self.changeState('initializing') self.name = '' def __str__(self): return '%s -- %s' % (self.__class__, self.state) def openConnection(self, host, port, callback, errback, disableReadTimeout = None): self.name = 'Outgoing %s:%s' % (host, port) self.host = host self.port = port def callbackIntercept(asyncSocket): if callback: trapCall(self, callback, self) self.stream.openConnection(host, port, callbackIntercept, errback, disableReadTimeout) def closeConnection(self): if self.stream.isOpen(): self.stream.closeConnection() self.changeState('closed') def sendData(self, data, callback = None): self.stream.sendData(data, callback) def changeState(self, newState): self.readHandler = self.states[newState] self.state = newState self.updateReadCallback() def updateReadCallback(self): if self.readHandler is not None: self.stream.startReading(self.handleData) elif self.stream.isOpen(): try: self.stream.stopReading() except KeyError: pass except: None<EXCEPTION MATCH>KeyError None<EXCEPTION MATCH>KeyError def handleData(self, data): self.buffer.addData(data) lastState = self.state self.readHandler() while self.readHandler is not None and lastState != self.state: lastState = self.state self.readHandler() def closeCallback(self, stream, type): self.handleClose(type) def handleClose(self, type): '''Handle our stream becoming closed. Type is either socket.SHUT_RD, or socket.SHUT_WR. ''' raise NotImplementedError() class HTTPConnection(ConnectionHandler): scheme = 'http' def __init__(self, closeCallback = None, readyCallback = None): super(HTTPConnection, self).__init__() self.shortVersion = 0 self.states['ready'] = None self.states['response-status'] = self.onStatusData self.states['response-headers'] = self.onHeaderData self.states['response-body'] = self.onBodyData self.states['chunk-size'] = self.onChunkSizeData self.states['chunk-data'] = self.onChunkData self.states['chunk-crlf'] = self.onChunkCRLFData self.states['chunk-trailer'] = self.onChunkTrailerData self.changeState('ready') self.idleSince = clock() self.unparsedHeaderLine = '' self.pipelinedRequest = None self.closeCallback = closeCallback self.readyCallback = readyCallback self.requestsFinished = 0 self.bytesRead = 0 self.sentReadyCallback = False self.headerCallback = None self.bodyDataCallback = None def handleData(self, data): self.bytesRead += len(data) super(HTTPConnection, self).handleData(data) def closeConnection(self): super(HTTPConnection, self).closeConnection() if self.closeCallback is not None: self.closeCallback(self) self.closeCallback = None self.checkPipelineNotStarted() def checkPipelineNotStarted(self): '''Call this when the connection is closed by Democracy or the other side. It will check if we have an unstarted pipeline request and send it the PipelinedRequestNeverStarted error ''' if self.pipelinedRequest is not None: errback = self.pipelinedRequest[1] trapCall(self, errback, PipelinedRequestNeverStarted()) self.pipelinedRequest = None def canSendRequest(self): if self.state == 'ready' and self.state != 'closed' and self.pipelinedRequest is None and not (self.willClose): pass return PIPELINING_ENABLED def sendRequest(self, callback, errback, host, port, requestStartCallback = None, headerCallback = None, bodyDataCallback = None, method = 'GET', path = '/', headers = None, postVariables = None, postFiles = None): """Sending an HTTP Request. callback will be called if the request completes normally, errback will be called if there is a network error. Callback will be passed a dictionary that represents the HTTP response, it will have an entry for each header sent by the server as well as as the following keys: body, version, status, reason, method, path, host, port They should be self explanatory, status and port will be integers, the other items will be strings. If requestStartCallback is given, it will be called just before the we start receiving data for the request (this can be a while after sending the request in the case of pipelined requests). It will be passed this connection object. If headerCallback is given, it will be called when the headers are read in. It will be passed a response object whose body is set to None. If bodyDataCallback is given it will be called as we read in the data for the body. Also, the connection won't store the body in memory, and the callback is called, it will be passed None for the body. postVariables is a dictionary of variable names to values postFiles is a dictionary of variable names to dictionaries containing filename, mimetype, and handle attributes. Handle should be an already open file handle. """ if not self.canSendRequest(): raise NetworkError(_('Unknown'), _('Internal Error: Not ready to send')) if headers is None: headers = { } else: headers = headers.copy() headers['Host'] = host.encode('idna') if port != defaultPort(self.scheme): headers['Host'] += ':%d' % port headers['Accept-Encoding'] = 'identity' if method == 'POST' and postVariables is not None and len(postVariables) > 0 and postFiles is None: postData = URLEncodeDict(postVariables) headers['Content-Type'] = 'application/x-www-form-urlencoded' headers['Content-Length'] = '%d' % len(postData) elif method == 'POST' and postFiles is not None: (postData, boundary) = multipartEncode(postVariables, postFiles) headers['Content-Type'] = 'multipart/form-data; boundary=%s' % boundary headers['Content-Length'] = '%d' % len(postData) else: postData = None self.sendRequestData(method, path, headers, postData) args = (callback, errback, requestStartCallback, headerCallback, bodyDataCallback, method, path, headers) if self.state == 'ready': self.startNewRequest(*args) else: self.pipelinedRequest = args def startNewRequest(self, callback, errback, requestStartCallback, headerCallback, bodyDataCallback, method, path, headers): """Called when we're ready to start processing a new request, either because one has just been made, or because we've pipelined one, and the previous request is done. """ if requestStartCallback: trapCall(self, requestStartCallback, self) if self.state == 'closed': return None self.callback = callback self.errback = errback self.headerCallback = headerCallback self.bodyDataCallback = bodyDataCallback self.method = method self.path = path self.requestHeaders = headers self.headers = { } self.contentLength = None self.version = None self.status = None self.reason = None self.bytesRead = 0 self.body = '' self.willClose = True self.chunked = False self.chunks = [] self.idleSince = None self.sentReadyCallback = False self.changeState('response-status') def sendRequestData(self, method, path, headers, data = None): sendOut = [] path = path.encode('ascii', 'replace') path = urllib.quote(path, safe = "-_.!~*'();/?:@&=+$,%#") sendOut.append('%s %s HTTP/1.1\r\n' % (method, path)) for header, value in headers.items(): sendOut.append('%s: %s\r\n' % (header, value)) sendOut.append('\r\n') if data is not None: sendOut.append(data) self.sendData(''.join(sendOut)) def onStatusData(self): line = self.buffer.readline() if line is not None: self.handleStatusLine(line) if self.state == 'closed': return None if self.shortVersion != 9: self.changeState('response-headers') else: self.startBody() def onHeaderData(self): while self.state == 'response-headers': line = self.buffer.readline() if line is None: break self.handleHeaderLine(line) def onBodyData(self): if self.bodyDataCallback: if self.contentLength is None: data = self.buffer.read() else: bytesLeft = self.contentLength - self.bodyBytesRead data = self.buffer.read(bytesLeft) if data == '': return None self.bodyBytesRead += len(data) trapCall(self, self.bodyDataCallback, data) if self.state == 'closed': return None if self.contentLength is not None and self.bodyBytesRead == self.contentLength: self.finishRequest() elif self.contentLength is not None and self.buffer.length >= self.contentLength: self.body = self.buffer.read(self.contentLength) self.finishRequest() def onChunkSizeData(self): line = self.buffer.readline() if line is not None: sizeString = line.split(';', 1)[0] try: self.chunkSize = int(sizeString, 16) except ValueError: self.handleError(BadChunkSize(line)) return None if self.chunkSize != 0: self.chunkBytesRead = 0 self.changeState('chunk-data') else: self.changeState('chunk-trailer') def onChunkData(self): if self.bodyDataCallback: bytesLeft = self.chunkSize - self.chunkBytesRead data = self.buffer.read(bytesLeft) self.chunkBytesRead += len(data) if data == '': return None trapCall(self, self.bodyDataCallback, data) if self.chunkBytesRead == self.chunkSize: self.changeState('chunk-crlf') elif self.buffer.length >= self.chunkSize: self.chunks.append(self.buffer.read(self.chunkSize)) self.changeState('chunk-crlf') def onChunkCRLFData(self): if self.buffer.length >= 2: crlf = self.buffer.read(2) if crlf != '\r\n': self.handleError(CRLFExpected(crlf)) else: self.changeState('chunk-size') def onChunkTrailerData(self): line = self.buffer.readline() while line is not None: if line == '': self.finishRequest() break line = self.buffer.readline() def handleStatusLine(self, line): try: (version, status, reason) = line.split(None, 2) except ValueError: try: (version, status) = line.split(None, 1) reason = '' except ValueError: version = '' except: None<EXCEPTION MATCH>ValueError None<EXCEPTION MATCH>ValueError if not version.startswith('HTTP/'): self.buffer.unread(line + '\r\n') self.version = 'HTTP/0.9' self.status = 200 self.reason = '' self.shortVersion = 9 else: try: status = int(status) if status < 100 or status > 599: self.handleError(BadStatusLine(line)) return None except ValueError: self.handleError(BadStatusLine(line)) return None if version == 'HTTP/1.0': self.shortVersion = 10 elif version.startswith('HTTP/1.'): self.shortVersion = 11 else: self.handleError(BadStatusLine(line)) return None self.version = version self.status = status self.reason = reason def handleHeaderLine(self, line): if self.unparsedHeaderLine == '': if line == '': if self.status != 100: self.startBody() else: self.changeState('response-status') elif ':' in line: self.parseHeader(line) else: self.unparsedHeaderLine = line elif len(line) > 0 and line[0] in (' ', '\t'): self.unparsedHeaderLine += line.lstrip() if ':' in self.unparsedHeaderLine: self.parseHeader(self.unparsedHeaderLine) self.unparsedHeaderLine = '' else: msg = 'line: %s, next line: %s' % (self.unparsedHeaderLine, line) self.handleError(BadHeaderLine(msg)) def parseHeader(self, line): (header, value) = line.split(':', 1) value = value.strip() header = header.lstrip().lower() if value == '': print 'DTV: Warning: Bad Header from %s://%s:%s%s (%s)' % (self.scheme, self.host, self.port, self.path, line) if header not in self.headers: self.headers[header] = value else: self.headers[header] += ',%s' % value def startBody(self): self.findExpectedLength() self.checkChunked() self.decideWillClose() if self.headerCallback: trapCall(self, self.headerCallback, self.makeResponse()) if self.state == 'closed': return None if self.status <= self.status: pass elif self.status <= 199 and self.status in (204, 304) and self.method == 'HEAD' or self.contentLength == 0: self.finishRequest() elif self.bodyDataCallback: self.bodyBytesRead = 0 if not self.chunked: self.changeState('response-body') else: self.changeState('chunk-size') self.maybeSendReadyCallback() def checkChunked(self): te = self.headers.get('transfer-encoding', '') self.chunked = te.lower() == 'chunked' def findExpectedLength(self): self.contentLength = None if self.status == 416: try: contentRange = self.headers['content-range'] except KeyError: pass m = re.search('bytes\\s+\\*/(\\d+)', contentRange) if m is not None: try: self.contentLength = int(m.group(1)) except (ValueError, TypeError): pass except: None<EXCEPTION MATCH>(ValueError, TypeError) None<EXCEPTION MATCH>(ValueError, TypeError) if self.contentLength is None and self.headers.get('transfer-encoding') in ('identity', None): try: self.contentLength = int(self.headers['content-length']) except (ValueError, KeyError): pass except: None<EXCEPTION MATCH>(ValueError, KeyError) None<EXCEPTION MATCH>(ValueError, KeyError) if self.contentLength < 0: self.contentLength = None def decideWillClose(self): if self.shortVersion != 11: self.willClose = True elif 'close' in self.headers.get('connection', '').lower(): self.willClose = True elif not (self.chunked) and self.contentLength is None: self.willClose = True else: self.willClose = False def finishRequest(self): origCallback = self.callback if self.bodyDataCallback: body = None elif self.chunked: body = ''.join(self.chunks) else: body = self.body response = self.makeResponse(body) if self.stream.isOpen(): if self.willClose: self.closeConnection() self.changeState('closed') elif self.pipelinedRequest is not None: req = self.pipelinedRequest self.pipelinedRequest = None self.startNewRequest(*req) else: self.changeState('ready') self.idleSince = clock() trapCall(self, origCallback, response) self.requestsFinished += 1 self.maybeSendReadyCallback() def makeResponse(self, body = None): response = self.headers.copy() response['body'] = body for key in ('version', 'status', 'reason', 'method', 'path', 'host', 'port', 'contentLength'): response[key] = getattr(self, key) return response def maybeSendReadyCallback(self): if self.readyCallback and self.canSendRequest() and not (self.sentReadyCallback): self.sentReadyCallback = True (eventloop.addIdle,)((lambda : self.readyCallback(self)), 'Ready Callback %s' % str(self)) def handleClose(self, type): oldState = self.state self.closeConnection() if oldState == 'response-body' and self.contentLength is None: self.body = self.buffer.read() self.finishRequest() elif self.stream.timedOut: self.errback(ConnectionTimeout(self.host)) else: self.errback(ServerClosedConnection(self.host)) self.checkPipelineNotStarted() def handleError(self, error): self.closeConnection() trapCall(self, self.errback, error) class HTTPSConnection(HTTPConnection): streamFactory = AsyncSSLStream scheme = 'https' class ProxyHTTPSConnection(HTTPConnection): streamFactory = ProxiedAsyncSSLStream scheme = 'https' class HTTPConnectionPool(object): '''Handle a pool of HTTP connections. We use the following stategy to handle new requests: * If there is an connection on the server that\'s ready to send, use that. * If we haven\'t hit our connection limits, create a new request * When a connection becomes closed, we look for our last NOTE: "server" in this class means the combination of the scheme, hostname and port. ''' HTTP_CONN = HTTPConnection HTTPS_CONN = HTTPSConnection PROXY_HTTPS_CONN = ProxyHTTPSConnection MAX_CONNECTIONS_PER_SERVER = 2 CONNECTION_TIMEOUT = 300 MAX_CONNECTIONS = 30 def __init__(self): self.pendingRequests = [] self.activeConnectionCount = 0 self.freeConnectionCount = 0 self.connections = { } eventloop.addTimeout(60, self.cleanupPool, 'Check HTTP Connection Timeouts') def _getServerConnections(self, scheme, host, port): key = '%s:%s:%s' % (scheme, host, port) try: return self.connections[key] except KeyError: self.connections[key] = { 'free': set(), 'active': set() } return self.connections[key] def _popPendingRequest(self): '''Try to choose a pending request to process. If one is found, remove it from the pendingRequests list and return it. If not, return None. ''' if self.activeConnectionCount >= self.MAX_CONNECTIONS: return None for i in xrange(len(self.pendingRequests)): req = self.pendingRequests[i] if req['proxy_host']: conns = self._getServerConnections(req['scheme'], req['proxy_host'], req['proxy_port']) else: conns = self._getServerConnections(req['scheme'], req['host'], req['port']) if len(conns['free']) > 0 or len(conns['active']) < self.MAX_CONNECTIONS_PER_SERVER: del self.pendingRequests[i] return req continue return None def _onConnectionClosed(self, conn): conns = self._getServerConnections(conn.scheme, conn.host, conn.port) if conn in conns['active']: conns['active'].remove(conn) self.activeConnectionCount -= 1 elif conn in conns['free']: conns['free'].remove(conn) self.freeConnectionCount -= 1 else: logging.warn('_onConnectionClosed called with connection not in either queue') self.runPendingRequests() def _onConnectionReady(self, conn): conns = self._getServerConnections(conn.scheme, conn.host, conn.port) if conn in conns['active']: conns['active'].remove(conn) self.activeConnectionCount -= 1 else: logging.warn('_onConnectionReady called with connection not in the active queue') if conn not in conns['free']: conns['free'].add(conn) self.freeConnectionCount += 1 else: logging.warn('_onConnectionReady called with connection already in the free queue') self.runPendingRequests() def addRequest(self, callback, errback, requestStartCallback, headerCallback, bodyDataCallback, url, method, headers, postVariables = None, postFiles = None): '''Add a request to be run. The request will run immediately if we have a free connection, otherwise it will be queued. returns a request id that can be passed to cancelRequest ''' proxy_host = None proxy_port = None (scheme, host, port, path) = parseURL(url) if scheme not in ('http', 'https') and host == '' or path == '': errback(MalformedURL(url)) return None if scheme == 'http' and config.get(prefs.HTTP_PROXY_ACTIVE): if config.get(prefs.HTTP_PROXY_HOST) and config.get(prefs.HTTP_PROXY_PORT): proxy_host = config.get(prefs.HTTP_PROXY_HOST) proxy_port = config.get(prefs.HTTP_PROXY_PORT) path = url scheme = config.get(prefs.HTTP_PROXY_SCHEME) if config.get(prefs.HTTP_PROXY_AUTHORIZATION_ACTIVE): username = config.get(prefs.HTTP_PROXY_AUTHORIZATION_USERNAME) password = config.get(prefs.HTTP_PROXY_AUTHORIZATION_PASSWORD) authString = username + ':' + password authString = b64encode(authString) headers['ProxyAuthorization'] = 'Basic ' + authString req = { 'callback': callback, 'errback': errback, 'requestStartCallback': requestStartCallback, 'headerCallback': headerCallback, 'bodyDataCallback': bodyDataCallback, 'scheme': scheme, 'host': host, 'port': port, 'method': method, 'path': path, 'headers': headers, 'postVariables': postVariables, 'postFiles': postFiles, 'proxy_host': proxy_host, 'proxy_port': proxy_port } self.pendingRequests.append(req) self.runPendingRequests() def runPendingRequests(self): '''Find pending requests have a free connection, otherwise it will be queued. ''' while True: req = self._popPendingRequest() if req is None: return None if req['proxy_host']: conns = self._getServerConnections(req['scheme'], req['proxy_host'], req['proxy_port']) else: conns = self._getServerConnections(req['scheme'], req['host'], req['port']) if len(conns['free']) > 0: conn = conns['free'].pop() self.freeConnectionCount -= 1 conn.sendRequest(req['callback'], req['errback'], req['host'], req['port'], req['requestStartCallback'], req['headerCallback'], req['bodyDataCallback'], req['method'], req['path'], req['headers'], req['postVariables'], req['postFiles']) else: conn = self._makeNewConnection(req) conns['active'].add(conn) self.activeConnectionCount += 1 connectionCount = self.activeConnectionCount + self.freeConnectionCount if connectionCount > self.MAX_CONNECTIONS: self._dropAFreeConnection() continue self def _makeNewConnection(self, req): disableReadTimeout = req['postFiles'] is not None def openConnectionCallback(conn): conn.sendRequest(req['callback'], req['errback'], req['host'], req['port'], req['requestStartCallback'], req['headerCallback'], req['bodyDataCallback'], req['method'], req['path'], req['headers'], req['postVariables'], req['postFiles']) def openConnectionErrback(error): if req['proxy_host']: conns = self._getServerConnections(req['scheme'], req['proxy_host'], req['proxy_port']) else: conns = self._getServerConnections(req['scheme'], req['host'], req['port']) if conn in conns['active']: conns['active'].remove(conn) self.activeConnectionCount -= 1 req['errback'](error) if req['scheme'] == 'https' and config.get(prefs.HTTP_PROXY_ACTIVE): connect_scheme = 'proxied_https' else: connect_scheme = req['scheme'] if connect_scheme == 'http': conn = self.HTTP_CONN(self._onConnectionClosed, self._onConnectionReady) elif connect_scheme == 'https': conn = self.HTTPS_CONN(self._onConnectionClosed, self._onConnectionReady) elif connect_scheme == 'proxied_https': conn = self.PROXY_HTTPS_CONN(self._onConnectionClosed, self._onConnectionReady) else: raise AssertionError("Code shouldn't reach here. (connect scheme %s)" % connect_scheme) if req['proxy_host']: (None, None, None, None, eventloop.addIdle)((lambda : conn.openConnection(req['proxy_host'], req['proxy_port'], openConnectionCallback, openConnectionErrback, disableReadTimeout)), 'Open connection %s' % str(self)) else: (None, None, None, None, eventloop.addIdle)((lambda : conn.openConnection(req['host'], req['port'], openConnectionCallback, openConnectionErrback, disableReadTimeout)), 'Open connection %s' % str(self)) return conn def _dropAFreeConnection(self): firstTime = sys.maxint toDrop = None for conns in self.connections.values(): for candidate in conns['free']: if candidate.idleSince < firstTime: toDrop = candidate continue if toDrop is not None: toDrop.closeConnection() def cleanupPool(self): for serverKey in self.connections.keys(): conns = self.connections[serverKey] toRemove = [] for conn in conns['free']: if conn.idleSince is not None and conn.idleSince + self.CONNECTION_TIMEOUT <= clock(): toRemove.append(conn) continue for conn in toRemove: conn.closeConnection() if len(conns['active']) == len(conns['active']): pass elif len(conns['active']) == 0: del self.connections[serverKey] continue eventloop.addTimeout(60, self.cleanupPool, 'HTTP Connection Pool Cleanup') class HTTPClient(object): '''High-level HTTP client object. HTTPClients handle a single HTTP request, but may use several HTTPConnections if the server returns back with a redirection status code, asks for authorization, etc. Connections are pooled using an HTTPConnectionPool object. ''' connectionPool = HTTPConnectionPool() MAX_REDIRECTS = 10 MAX_AUTH_ATTEMPS = 5 def __init__(self, url, callback, errback, headerCallback = None, bodyDataCallback = None, method = 'GET', start = 0, etag = None, modified = None, cookies = None, postVariables = None, postFiles = None): if cookies == None: cookies = { } self.url = url self.callback = callback self.errback = errback self.headerCallback = headerCallback self.bodyDataCallback = bodyDataCallback self.method = method self.start = start self.etag = etag self.modified = modified self.cookies = cookies self.postVariables = postVariables self.postFiles = postFiles self.depth = 0 self.authAttempts = 0 self.updateURLOk = True self.originalURL = self.updatedURL = self.redirectedURL = url self.userAgent = '%s/%s (%s)' % (config.get(prefs.SHORT_APP_NAME), config.get(prefs.APP_VERSION), config.get(prefs.PROJECT_URL)) self.connection = None self.cancelled = False self.initHeaders() def __str__(self): return '%s: %s' % (type(self).__name__, self.url) def cancel(self): self.cancelled = True if self.connection is not None: self.connection.closeConnection() self.connection = None def isValidCookie(self, cookie, scheme, host, port, path): if not time.time() - cookie['received'] < cookie['Max-Age'] and cookie['Version'] == '1' and self.hostMatches(host, cookie['Domain']) and path.startswith(cookie['Path']) and self.portMatches(str(port), cookie['Port']) and scheme == 'https': pass return not cookie['Secure'] def dropStaleCookies(self): '''Remove cookies that have expired or are invalid for this URL''' (scheme, host, port, path) = parseURL(self.url) temp = { } for name in self.cookies: if self.isValidCookie(self.cookies[name], scheme, host, port, path): temp[name] = self.cookies[name] continue self.cookies = temp def hostMatches(self, host, host2): host = host.lower() host2 = host2.lower() if host.find('.') == -1: host = host + '.local' if host2.find('.') == -1: host2 = host2 + '.local' if host2.startswith('.'): return host.endswith(host2) else: return host == host2 def portMatches(self, port, portlist): if portlist is None: return True portlist = portlist.replace(',', ' ').split() return port in portlist def initHeaders(self): self.headers = { } if self.start > 0: self.headers['Range'] = 'bytes=' + str(self.start) + '-' if self.etag is not None: self.headers['If-None-Match'] = self.etag if self.modified is not None: self.headers['If-Modified-Since'] = self.modified self.headers['User-Agent'] = self.userAgent self.setCookieHeader() def setCookieHeader(self): self.dropStaleCookies() if len(self.cookies) > 0: header = '$Version=1' for name in self.cookies: header = '%s;%s=%s' % (header, name, self.cookies[name]['Value']) if self.cookies[name].has_key('origPath'): header = '%s;$Path=%s' % (header, self.cookies[name]['origPath']) if self.cookies[name].has_key('origDomain'): header = '%s;$Domain=%s' % (header, self.cookies[name]['origDomain']) if self.cookies[name].has_key('origPort'): header = '%s;$Port=%s' % (header, self.cookies[name]['origPort']) continue self.headers['Cookie'] = header def startRequest(self): self.cancelled = False self.connection = None self.willHandleResponse = False self.gotBadStatusCode = False if 'Authorization' not in self.headers: (scheme, host, port, path) = parseURL(self.redirectedURL) def callback(authHeader): if self.cancelled: return None if authHeader is not None: self.headers['Authorization'] = authHeader self.reallyStartRequest() httpauth.findHTTPAuth(callback, host.decode('ascii', 'replace'), path.decode('ascii', 'replace')) else: self.reallyStartRequest() def reallyStartRequest(self): if self.bodyDataCallback is not None: bodyDataCallback = self.onBodyData else: bodyDataCallback = None self.connectionPool.addRequest(self.callbackIntercept, self.errbackIntercept, self.onRequestStart, self.onHeaders, bodyDataCallback, self.url, self.method, self.headers, self.postVariables, self.postFiles) def statusCodeExpected(self, status): expectedStatusCodes = set([ 200]) if self.start != 0: expectedStatusCodes.add(206) if self.etag is not None or self.modified is not None: expectedStatusCodes.add(304) return status in expectedStatusCodes def callbackIntercept(self, response): if self.cancelled: print 'WARNING: Callback on a cancelled request for %s' % self.url traceback.print_stack() return None if self.shouldRedirect(response): self.handleRedirect(response) elif self.shouldAuthorize(response): self.handleAuthorize(response) else: self.connection = None if not self.gotBadStatusCode: if self.callback: response = self.prepareResponse(response) trapCall(self, self.callback, response) elif self.errback: error = UnexpectedStatusCode(response['status']) self.errbackIntercept(error) def errbackIntercept(self, error): if self.cancelled: return None elif isinstance(error, PipelinedRequestNeverStarted): self.startRequest() elif isinstance(error, ServerClosedConnection) and self.connection is not None and self.connection.requestsFinished > 0 and self.connection.bytesRead == 0: self.startRequest() else: self.connection = None trapCall(self, self.errback, error) def onRequestStart(self, connection): if self.cancelled: connection.closeConnection() else: self.connection = connection def onHeaders(self, response): if self.shouldRedirect(response) or self.shouldAuthorize(response): self.willHandleResponse = True elif not self.statusCodeExpected(response['status']): self.gotBadStatusCode = True if self.headerCallback is not None: response = self.prepareResponse(response) if not trapCall(self, self.headerCallback, response): self.cancel() def onBodyData(self, data): if not (self.willHandleResponse) and not (self.gotBadStatusCode) and self.bodyDataCallback: if not trapCall(self, self.bodyDataCallback, data): self.cancel() def prepareResponse(self, response): response['original-url'] = self.originalURL response['updated-url'] = self.updatedURL response['redirected-url'] = self.redirectedURL response['filename'] = self.getFilenameFromResponse(response) response['charset'] = self.getCharsetFromResponse(response) try: response['cookies'] = self.getCookiesFromResponse(response) except: print 'ERROR in getCookiesFromResponse()' traceback.print_exc() return response def getCookiesFromResponse(self, response): '''Generates a cookie dictionary from headers in response ''' def getAttrPair(attr): result = attr.strip().split('=', 1) if len(result) == 2: (name, value) = result else: name = result[0] value = '' return (name, value) cookies = { } cookieStrings = [] if response.has_key('set-cookie') or response.has_key('set-cookie2'): (scheme, host, port, path) = parseURL(self.redirectedURL) if response.has_key('set-cookie'): cookieStrings.extend(response['set-cookie'].split(',')) if response.has_key('set-cookie2'): cookieStrings.extend(response['set-cookie2'].split(',')) temp = [] for string in cookieStrings: if len(temp) > 0: if temp[-1].count('"') % 2 == 1 and string.find('=') == -1 or string.find('=') > string.find(';'): temp[-1] = '%s,%s' % (temp[-1], string) continue temp.append(string) cookieStrings = temp for string in cookieStrings: string = string.strip() pairs = string.split(';') temp = [] for pair in pairs: if len(temp) > 0 and temp[-1].count('"') % 2 == 1: temp[-1] = '%s;%s' % (temp[-1], pair) continue temp.append(pair) pairs = temp (name, value) = getAttrPair(pairs.pop(0)) cookie = { 'Value': value, 'Version': '1', 'received': time.time(), 'Path': '/'.join(path.split('/')[:-1]) + '/', 'Domain': host, 'Port': str(port), 'Secure': False } for attr in pairs: attr = attr.strip() if attr.lower() == 'discard': cookie['Discard'] = True continue if attr.lower() == 'secure': cookie['Secure'] = True continue if attr.lower().startswith('version='): cookie['Version'] = getAttrPair(attr)[1] continue if attr.lower().startswith('comment='): cookie['Comment'] = getAttrPair(attr)[1] continue if attr.lower().startswith('commenturl='): cookie['CommentURL'] = getAttrPair(attr)[1] continue if attr.lower().startswith('max-age='): cookie['Max-Age'] = getAttrPair(attr)[1] continue if attr.lower().startswith('expires='): now = time.time() cookieval = getAttrPair(attr)[1].strip() expires = get_cookie_expiration_date(cookieval) expires -= time.timezone if expires < now: cookie['Max-Age'] = 0 else: cookie['Max-Age'] = int(expires - now) expires < now if attr.lower().startswith('domain='): cookie['origDomain'] = getAttrPair(attr)[1] cookie['Domain'] = cookie['origDomain'] continue if attr.lower().startswith('port='): cookie['origPort'] = getAttrPair(attr)[1] cookie['Port'] = cookie['origPort'] continue if attr.lower().startswith('path='): cookie['origPath'] = getAttrPair(attr)[1] cookie['Path'] = cookie['origPath'] continue if not cookie.has_key('Discard'): cookie['Discard'] = not cookie.has_key('Max-Age') if not cookie.has_key('Max-Age'): cookie['Max-Age'] = str(1073741824) if self.isValidCookie(cookie, scheme, host, port, path): cookies[name] = cookie continue return cookies def findValueFromHeader(self, header, targetName): """Finds a value from a response header that uses key=value pairs with the ';' char as a separator. This is how content-disposition and content-type work. """ for part in header.split(';'): try: (name, value) = part.split('=', 1) except ValueError: continue if name.strip().lower() == targetName.lower(): return value.strip().strip('"') continue return None def getFilenameFromResponse(self, response): try: disposition = response['content-disposition'] except KeyError: pass filename = self.findValueFromHeader(disposition, 'filename') if filename is not None: return cleanFilename(filename) return filenameFromURL(util.unicodify(response['redirected-url']), clean = True) def getCharsetFromResponse(self, response): try: contentType = response['content-type'] except KeyError: pass charset = self.findValueFromHeader(contentType, 'charset') if charset is not None: return charset return 'iso-8859-1' def shouldRedirect(self, response): if response['status'] in (301, 302, 303, 307) and self.depth < self.MAX_REDIRECTS: pass return 'location' in response def handleRedirect(self, response): self.depth += 1 self.url = urljoin(self.url, response['location']) self.redirectedURL = self.url if response['status'] == 301 and self.updateURLOk: self.updatedURL = self.url else: self.updateURLOk = False if response['status'] == 303: self.method = 'GET' self.postVariables = None if 'Authorization' in self.headers: del self.headers['Authorization'] self.startRequest() def shouldAuthorize(self, response): if response['status'] == 401 and self.authAttempts < self.MAX_AUTH_ATTEMPS: pass return 'www-authenticate' in response def handleAuthorize(self, response): match = re.search('(\\w+)\\s+realm\\s*=\\s*"(.*?)"$', response['www-authenticate']) if match is None: trapCall(self, self.errback, AuthorizationFailed()) return None authScheme = unicode(match.expand('\\1')) realm = unicode(match.expand('\\2')) if authScheme.lower() != 'basic': trapCall(self, self.errback, AuthorizationFailed()) return None def callback(authHeader): if authHeader is not None: self.headers['Authorization'] = authHeader self.authAttempts += 1 self.startRequest() else: trapCall(self, self.errback, AuthorizationFailed()) httpauth.askForHTTPAuth(callback, self.url, realm, authScheme) def grabURL(url, callback, errback, headerCallback = None, bodyDataCallback = None, method = 'GET', start = 0, etag = None, modified = None, cookies = None, postVariables = None, postFiles = None, defaultMimeType = 'application/octet-stream', clientClass = HTTPClient): if cookies == None: cookies = { } if url.startswith('file://'): path = getFileURLPath(url) try: f = file(path) except EnvironmentError: errback(FileURLNotFoundError(path)) try: data = f.read() except: errback(FileURLReadError(path)) callback({ 'body': data, 'updated-url': url, 'redirected-url': url, 'content-type': defaultMimeType }) else: client = clientClass(url, callback, errback, headerCallback, bodyDataCallback, method, start, etag, modified, cookies, postVariables, postFiles) client.startRequest() return client class HTTPHeaderGrabber(HTTPClient): '''Modified HTTPClient to get the headers for a URL. It tries to do a HEAD request, then falls back on doing a GET request, and closing the connection right after the headers. ''' def __init__(self, url, callback, errback): """HTTPHeaderGrabber support a lot less features than a real HTTPClient, mostly this is because they don't make sense in this context.""" HTTPClient.__init__(self, url, callback, errback) def startRequest(self): self.method = 'HEAD' HTTPClient.startRequest(self) def errbackIntercept(self, error): if self.method == 'HEAD' and not (self.cancelled): self.method = 'GET' HTTPClient.startRequest(self) else: HTTPClient.errbackIntercept(self, error) def callbackIntercept(self, response): if self.method != 'GET' or self.willHandleResponse: HTTPClient.callbackIntercept(self, response) def onHeaders(self, headers): HTTPClient.onHeaders(self, headers) if self.method == 'GET' and not (self.willHandleResponse): headers['body'] = '' self.callback(self.prepareResponse(headers)) self.cancel() def grabHeaders(url, callback, errback, clientClass = HTTPHeaderGrabber): client = clientClass(url, callback, errback) client.startRequest() return client